Methods

Load forecasting “score card”

  • Produced via Generate_score_cards.Rmd for January 1, 2021- February 1, 2022
  • Set dates for alpha, delta, and omicron analysis periods

Scaling by baseline

We scale each score, per location and forecast date, by the ensemble model score; then we take the mean or median.

Important note on the order of operations here: scale then aggregate. The other way around: aggregate then scale, would be a simple post-adjustment applied to the metrics we computed earlier. This way: scale then aggregate, results in a different final metric altogether. It is potentially interesting as it provides a nonparametric spatiotemporal adjustment; assuming that space and time effects are multiplicative, we’re directly “canceling them out” by taking ratios.

scale_df = function(df, var= "ae", base_forecaster = "CA-baseline") {
  df %>% select(-c(forecast_date)) %>%
    distinct() %>%
    pivot_wider(id_cols = c(geo_value, target_end_date, ahead),
                names_from = "forecaster", names_prefix = var, 
                values_from = var) %>%
    mutate(across(starts_with(var), ~ .x /
                !!as.symbol(paste0(var, base_forecaster)))) %>%
    pivot_longer(cols = starts_with(var), names_to = "forecaster",
                 values_to = "scaled") %>%
    mutate(forecaster = substring(forecaster, nchar(var) + 1)) %>%
    filter(forecaster != base_forecaster)
}

Pairwise tournament

We run a pairwise tournament. This is inspired by Johannes Bracher’s analysis (and similar ideas in the literature). Except, the order of operations here is different: scale then aggregate (whereas Johannes did: aggregate then scale). The motivation for this was explained above (thinking of it as providing a nonparametric spatiotemporal adjustment), as was the fact that the order of operations really makes a difference.

For each pair of forecasters \(f\) and \(g\), we compute:

\[ \theta_{fg} = A\bigg\{ \frac{S(f;\ell,d,a)}{S(g;\ell,d,a)} \;:\; \text{common locations $\ell$, forecast dates $d$, and ahead values $a$} \bigg\} \]

where \(S\) is a score of interest, say WIS, and \(A\) is an aggregator of interest, say the mean. Important note: we aggregate over all common locations, dates, and ahead values, which may differ for each pair \(f,g\). To compute an overall metric for forecaster \(f\), we use:

\[ \theta_f = \bigg( \prod_g \theta_{fg} \bigg)^{1/F}. \]

the geometric mean of all pairwise comparisons of \(f\) to other forecasters (here \(F\) is the total number of forecasters). Another interesting option would be to define \((\theta_f)_{f \in F}\) as the top left singular vector of the matrix \((\theta_{fg})_{f,g \in F}\), which we’ll also investigate.

# Define mean and median functions that deal with missingness well
Mean = function(x) mean(x, na.rm = TRUE)
Median = function(x) median(x, na.rm = TRUE)


pairwise_tournament = function(df, var, aggr = Mean) {
  forecasters = unique(df$forecaster)
  theta_mat = matrix(NA, length(forecasters), length(forecasters))
  rownames(theta_mat) = colnames(theta_mat) = forecasters
  for (f in forecasters) {
    result =  scale_df(df, var, base_forecaster = f) %>% 
      group_by(forecaster) %>%
      summarize(v = aggr(scaled))
    theta_mat[result$forecaster, f] = result$v
  }
  
  # Convert to data frame for convenience with ggplot
  theta_df = as.data.frame(theta_mat) %>%
    mutate(Forecaster1 = forecasters) %>%
    pivot_longer(cols = -Forecaster1, names_to = "Forecaster2",
                 values_to = "value")
  
  # Compute overall metrics two ways: geometric mean, SVD
  theta_vec1 = exp(rowMeans(log(theta_mat), na.rm = TRUE))
  # diag(theta_mat) = 1 # so the SVD won't fail; undo it later
  # theta_vec2 = as.numeric(svd(theta_mat, nu = 1)$u)
  # names(theta_vec2) = names(theta_vec1)
  diag(theta_mat) = NA
  
  return(list(mat = theta_mat, df = theta_df, vec1 = theta_vec1)) 
              #vec2 = theta_vec2))
}

Results

7-day MAE

Overall Rankings

theta = pairwise_tournament(score_cards_tourney, var = "norm_MAE_7day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_overall<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Whole time period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 COVID Nearterm 0.6741011
2 CA-baseline 0.7138747
3 LEMMA 0.8971657
4 Simple Growth 0.9354825
5 Ensemble 0.9648531
6 Columbia 2.5661583
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_overall<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Whole time period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_overall, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_overall, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Compare across counties for all dates

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline"))

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_7day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county")
# saveRDS(county_rankings, file="county_pairwise_rankings.RDS")

overall_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Whole period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure overall period

overall_col<-cowplot::plot_grid(A_overall, B_overall, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(overall_col, overall_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

Alpha Rankings

alpha_score_cards_tourney <- score_cards_tourney %>% filter(forecast_date > alpha_start & forecast_date< alpha_end)

theta = pairwise_tournament(alpha_score_cards_tourney, var = "norm_MAE_7day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_alpha<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Alpha period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 CA-baseline 0.5758121
2 COVID Nearterm 0.6313067
3 LEMMA 0.8555300
4 Simple Growth 0.9217736
5 Ensemble 1.0217773
6 Columbia 3.4139994
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_alpha<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Alpha period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_alpha, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_alpha, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Counties: Alpha period

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline"))  %>% filter(forecast_date > alpha_start & forecast_date< alpha_end)

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_7day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county") %>% na.omit()

alpha_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Alpha period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure Alpha period

alpha_col<-cowplot::plot_grid(A_alpha, B_alpha, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(alpha_col, alpha_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

Delta Rankings

delta_score_cards_tourney <- score_cards_tourney %>% filter(forecast_date > delta_start & forecast_date< delta_end)

theta = pairwise_tournament(delta_score_cards_tourney, var = "norm_MAE_7day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_delta<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Delta period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 COVID Nearterm 0.5879658
2 CA-baseline 0.9027293
3 Ensemble 0.9161413
4 LEMMA 0.9437425
5 Simple Growth 0.9952081
6 Columbia 2.1895963
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_delta<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Delta period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_delta, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_delta, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Counties: Delta period

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline")) %>% filter(forecast_date > delta_start & forecast_date< delta_end)

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_7day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county")

delta_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Delta period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure Delta period

delta_col<-cowplot::plot_grid(A_delta, B_delta, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(delta_col, delta_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

Omicron Rankings

omicron_score_cards_tourney <- score_cards_tourney %>% filter(forecast_date > omicron_start & forecast_date< omicron_end)

theta = pairwise_tournament(omicron_score_cards_tourney, var = "norm_MAE_7day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_omicron<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Omicron period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 LEMMA 0.5460847
2 CA-baseline 0.7340049
3 COVID Nearterm 0.7442003
4 Ensemble 0.9983329
5 Simple Growth 1.1752463
6 Columbia 2.8573353
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_omicron<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Omicron period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_omicron, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_omicron, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Counties: Omicron period

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline"))  %>% filter(forecast_date > omicron_start & forecast_date< omicron_end)

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_7day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county")

omicron_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Omicron period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure Omicron period

omicron_col<-cowplot::plot_grid(A_omicron, B_omicron, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(omicron_col, omicron_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

14-days

Overall Rankings

theta = pairwise_tournament(score_cards_tourney, var = "norm_MAE_14day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_overall<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Whole time period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 COVID Nearterm 0.7416021
2 CA-baseline 0.7948351
3 LEMMA 0.8954054
4 Ensemble 0.9357604
5 Simple Growth 0.9789573
6 Columbia 2.0682540
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_overall<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Whole time period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_overall, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_overall, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Compare across counties for all dates

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline"))

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_14day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county")
# saveRDS(county_rankings, file="county_pairwise_rankings.RDS")

overall_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2)), size=2) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Whole period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.y = element_text(size=7),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure overall period

overall_col<-cowplot::plot_grid(A_overall, B_overall, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(overall_col, overall_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

Alpha Rankings

alpha_score_cards_tourney <- score_cards_tourney %>% filter(forecast_date > alpha_start & forecast_date< alpha_end)

theta = pairwise_tournament(alpha_score_cards_tourney, var = "norm_MAE_14day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_alpha<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Alpha period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 CA-baseline 0.6276144
2 COVID Nearterm 0.7090067
3 LEMMA 0.8472324
4 Ensemble 0.9708680
5 Simple Growth 1.0111709
6 Columbia 2.7019041
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_alpha<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Alpha period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_alpha, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_alpha, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Counties: Alpha period

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline"))  %>% filter(forecast_date > alpha_start & forecast_date< alpha_end)

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_14day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% na.omit() %>% left_join(regions, by="county")

alpha_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Alpha period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure Alpha period

alpha_col<-cowplot::plot_grid(A_alpha, B_alpha, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(alpha_col, alpha_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

Delta Rankings

delta_score_cards_tourney <- score_cards_tourney %>% filter(forecast_date > delta_start & forecast_date< delta_end)

theta = pairwise_tournament(delta_score_cards_tourney, var = "norm_MAE_14day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_delta<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Delta period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 COVID Nearterm 0.7315694
2 LEMMA 0.8686898
3 Ensemble 0.8847230
4 CA-baseline 0.9552858
5 Simple Growth 0.9803747
6 Columbia 1.8991013
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_delta<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Delta period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_delta, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_delta, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Counties: Delta period

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline")) %>% filter(forecast_date > delta_start & forecast_date< delta_end)

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_14day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county")

delta_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Delta period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure Delta period

delta_col<-cowplot::plot_grid(A_delta, B_delta, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(delta_col, delta_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

Omicron Rankings

omicron_score_cards_tourney <- score_cards_tourney %>% filter(forecast_date > omicron_start & forecast_date< omicron_end)

theta = pairwise_tournament(omicron_score_cards_tourney, var = "norm_MAE_14day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_omicron<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Omicron period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 LEMMA 0.5484027
2 CA-baseline 0.7894430
3 COVID Nearterm 0.8566116
4 Ensemble 1.0478470
5 Simple Growth 1.3221465
6 Columbia 1.9463792
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_omicron<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Omicron period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_omicron, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_omicron, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Counties: Omicron period

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline"))  %>% filter(forecast_date > omicron_start & forecast_date< omicron_end)

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_14day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county")

omicron_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Omicron period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure Omicron period

omicron_col<-cowplot::plot_grid(A_omicron, B_omicron, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(omicron_col, omicron_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

21-days

Overall Rankings

theta = pairwise_tournament(score_cards_tourney, var = "norm_MAE_21day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_overall<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Whole time period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 COVID Nearterm 0.7950358
2 CA-baseline 0.8411398
3 LEMMA 0.9081730
4 Ensemble 0.9286966
5 Simple Growth 0.9698582
6 Columbia 1.8280769
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_overall<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Whole time period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_overall, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_overall, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Compare across counties for all dates

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline"))

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_21day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county")
# saveRDS(county_rankings, file="county_pairwise_rankings.RDS")

overall_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Whole period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure overall period

overall_col<-cowplot::plot_grid(A_overall, B_overall, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(overall_col, overall_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

Alpha Rankings

alpha_score_cards_tourney <- score_cards_tourney %>% filter(forecast_date > alpha_start & forecast_date< alpha_end)

theta = pairwise_tournament(alpha_score_cards_tourney, var = "norm_MAE_21day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_alpha<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Alpha period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 CA-baseline 0.6785078
2 COVID Nearterm 0.6965888
3 LEMMA 0.8763746
4 Ensemble 0.9433604
5 Simple Growth 1.0417530
6 Columbia 2.4566136
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_alpha<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Alpha period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_alpha, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_alpha, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Counties: Alpha period

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline"))  %>% filter(forecast_date > alpha_start & forecast_date< alpha_end)

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_21day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county") %>% na.omit()

alpha_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Alpha period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure Alpha period

alpha_col<-cowplot::plot_grid(A_alpha, B_alpha, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(alpha_col, alpha_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

Delta Rankings

delta_score_cards_tourney <- score_cards_tourney %>% filter(forecast_date > delta_start & forecast_date< delta_end)

theta = pairwise_tournament(delta_score_cards_tourney, var = "norm_MAE_21day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_delta<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Delta period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1), row.names = NULL))
rank forecaster theta
1 LEMMA 0.8300522
2 COVID Nearterm 0.8703716
3 Ensemble 0.8782002
4 CA-baseline 0.9247462
5 Simple Growth 0.9391087
6 Columbia 1.8149236
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_delta<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Delta period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_delta, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_delta, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Counties: Delta period

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline")) %>% filter(forecast_date > delta_start & forecast_date< delta_end)

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_21day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county")

delta_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Delta period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure Delta period

delta_col<-cowplot::plot_grid(A_delta, B_delta, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(delta_col, delta_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))

Omicron Rankings

omicron_score_cards_tourney <- score_cards_tourney %>% filter(forecast_date > omicron_start & forecast_date< omicron_end)

theta = pairwise_tournament(omicron_score_cards_tourney, var = "norm_MAE_21day", aggr = 
                              function(x) median(x, na.rm = TRUE))
ranked_list = rownames(theta$mat)[order(theta$vec1)]
theta$df<- theta$df %>% mutate(log_ranks = log(value, base= 10))

colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)
B_omicron<- ggplot(theta$df, aes(x = factor(Forecaster2, levels = rev(ranked_list)),
                     y = factor(Forecaster1, levels = rev(ranked_list)))) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(value, 3))) +
  # scale_fill_gradientn(colours = colors) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    midpoint=0)+
#    midpoint = mean(range(theta$df$value, na.rm=TRUE)))+
  # ggtitle("B: Median pairwise ranking (Omicron period)")+
  labs(x = NULL, y = NULL) +
  guides(fill="none")+
  theme_classic() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Overall metric (computed via GM of pairwise metrics):
knitr::kable(data.frame(rank = 1:length(theta$vec1), forecaster = ranked_list,
                        theta = sort(theta$vec1, na.last=TRUE), row.names = NULL))
rank forecaster theta
1 COVID Nearterm 0.7189547
2 Ensemble 0.8007621
3 CA-baseline 0.9147825
4 Simple Growth 1.1952514
5 LEMMA 1.5886199
6 Columbia NaN
overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1, na.last=TRUE))), ranks=theta$vec1, log_ranks = log(theta$vec1, base= 10), y=0)

A_omicron<-ggplot(overall_ranks, aes(x = forecaster, y=y)) +
  geom_tile(aes(fill = log_ranks)) +
  geom_text(aes(label = round(ranks, 3))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("A: Overall ranking (Omicron period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))


grid.newpage()
blank_height <- 0
rel_height_lower <- 1.35
pushViewport(viewport(layout =
    grid.layout(nrow = 2, ncol = 1,
      heights = unit(
        c(1, 3),
        c("null", "null")),
      widths = unit(c(1), c("null")))))
print(A_omicron, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(B_omicron, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Counties: Omicron period

county_list<-unique(score_cards_tourney$geo_value)
gg_county_rankings<-list()

county_score_cards_tourney <- score_cards_tourney %>% filter(model %in% c("lemma", "simple", "m.proj", "columbia", "covid_nearterm", "CA-baseline"))  %>% filter(forecast_date > omicron_start & forecast_date< omicron_end)

for(i in county_list)
{
  score_cards_tourney_county<- county_score_cards_tourney %>% filter(geo_value==i)
  
  theta = pairwise_tournament(score_cards_tourney_county, var = "norm_MAE_21day", aggr = 
                              function(x) median(x, na.rm = TRUE))
  
# ranked_list = rownames(theta$mat)[order(theta$vec1)]
# colors = colorRampPalette(brewer.pal(n = 6, name = "RdBu"))(30)

overall_ranks<-data.frame(forecaster=factor(names(theta$vec1), levels=names(sort(theta$vec1, na.last=TRUE))), ranks=theta$vec1, county=i)
gg_county_rankings[[i]]<-overall_ranks

}
county_rankings<-do.call(rbind.data.frame, gg_county_rankings) %>% left_join(regions, by="county")

omicron_counties<-county_rankings %>% filter(county %notin% c("Sutter", "Sierra", "Alpine")) %>% mutate(log_ranks = log(ranks, base= 10)) %>% ggplot(aes(x = factor(forecaster, levels = ranked_list), y=as.factor(county))) +
  geom_tile(aes(fill = log_ranks)) + facet_grid(region_acronym ~ ., scale= "free")+
  geom_text(aes(label = round(ranks, 2))) +
  scale_fill_gradient2("",
    low = "#2166ac",
    mid = "#f7f7f7",
    high = "#b2182b",
    # midpoint = mean(range(overall_ranks$ranks))) +
   midpoint = 0) +
  xlab("") +
  ylab("") +
  # ggtitle("County rankings (Omicron period)") +
  theme_classic() +
  guides(fill="none")+
  theme(axis.ticks.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    plot.margin = unit(c(5.5, 5.5, 5.5, 55.5), "pt"))

Panel figure Omicron period

omicron_col<-cowplot::plot_grid(A_omicron, B_omicron, labels="AUTO", ncol=1, rel_heights = c(1,2.5))
cowplot::plot_grid(omicron_col, omicron_counties, labels=c("", "C"), ncol=2, rel_widths=c(1.2, 1))